from pprint import pprint
import numpy as np
import pandas as pd


def prepare_data(data, verbose=False):
    # discarding text features, in this case they are not useful to us
    del data['Name']
    del data['Ticket']
    del data['Fare']
    del data['Embarked']

    # replacing NaN values with zeros
    data = data.fillna(value=0.0)

    if verbose: pprint(data.head())

    # converting some text features categorical ones
    for i in range(data.shape[0]):
        if data.at[i, 'Sex'] == 'male':
            data.at[i, 'Sex'] = 1
        elif data.at[i, 'Sex'] == 'female':
            data.at[i, 'Sex'] = 0

    if verbose: pprint(data.head())

    data['Age_group'] = 0

    for i in range(data.shape[0]):
        for j in range(70, 0, -10):
            if data.at[i, 'Age'] > j:
                data.at[i, 'Age_group'] = int(j / 10)
                break

    del data['Age']

    if verbose: pprint(data.head())

    data['Cabin_section'] = '0'

    for i in range(data.shape[0]):
        if data.at[i, 'Cabin'] != 0:
            data.at[i, 'Cabin_section'] = data.at[i, 'Cabin'][0]

    cabin_sections = sorted(list(set(data['Cabin_section'].values)))

    for i in range(data.shape[0]):
        data.at[i, 'Cabin_section'] = cabin_sections.index(data.at[i, 'Cabin_section'])

    del data['Cabin']

    if verbose: pprint(data.head())

    # converting categorical features to one-hot encoded features
    pclass = np.eye(data['Pclass'].values.max() + 1)[data['Pclass'].values]
    age_group = np.eye(data['Age_group'].values.max() + 1)[data['Age_group'].values]
    cabin_section = np.eye(data['Cabin_section'].values.max() + 1)[data['Cabin_section'].values.astype(int)]
    sex = np.eye(data['Sex'].values.max() + 1)[data['Sex'].values.astype(int)]

    # concatenating our features into one big one-hot encoded vector, sort of descriptor
    # this will be the input data (format) of our network
    train_data = data[['SibSp', 'Parch']].values
    train_data = np.concatenate([train_data, sex], axis=1)
    train_data = np.concatenate([train_data, age_group], axis=1)
    train_data = np.concatenate([train_data, pclass], axis=1)
    train_data = np.concatenate([train_data, cabin_section], axis=1)
    train_data = train_data.astype(float)

    train_labels = data['Survived'].values
    train_labels = train_labels.astype(float).reshape(-1, 1)

    return train_data, train_labels
